require(foreign)
#require(RCurl)
require(sandwich)
source("robust_summary.R")
source("BM_StandardErrors.R")

d2 <- read.dta("base02.dta")
d1 <- read.dta("base01.dta")
d0 <- read.dta("base00.dta")

d2$year <- 2
d1$year <- 1
d0$year <- 0

d2 <- d2[d2$boy == 0, ]
d1 <- d1[d1$boy == 0, ]
d0 <- d0[d0$boy == 0, ]

d2$ls25 <- as.numeric(d2$lagscore < quantile(d2$lagscore, 0.25, type = 2))
d2$ls50 <- as.numeric(d2$lagscore < quantile(d2$lagscore, 0.5, type = 2) & d2$ls25 == 0)
d2$ls75 <- as.numeric(d2$lagscore < quantile(d2$lagscore, 0.75, type = 2) & d2$ls25 == 0 & d2$ls50 == 0)
d2$ls100 <- as.numeric(d2$ls25 + d2$ls50 + d2$ls75 == 0)

d1$ls25 <- as.numeric(d1$lagscore < quantile(d1$lagscore, 0.25, type = 2))
d1$ls50 <- as.numeric(d1$lagscore < quantile(d1$lagscore, 0.5, type = 2) & d1$ls25 == 0)
d1$ls75 <- as.numeric(d1$lagscore < quantile(d1$lagscore, 0.75, type = 2) & d1$ls25 == 0 & d1$ls50 == 0)
d1$ls100 <- as.numeric(d1$ls25 + d1$ls50 + d1$ls75 == 0)

d0$ls25 <- as.numeric(d0$lagscore < quantile(d0$lagscore, 0.25, type = 2))
d0$ls50 <- as.numeric(d0$lagscore < quantile(d0$lagscore, 0.5, type = 2) & d0$ls25 == 0)
d0$ls75 <- as.numeric(d0$lagscore < quantile(d0$lagscore, 0.75, type = 2) & d0$ls25 == 0 & d0$ls50 == 0)
d0$ls100 <- as.numeric(d0$ls25 + d0$ls50 + d0$ls75 == 0)

d <- rbind(d2, d1, d0)

d$year02 <- as.numeric(d$year == 2)
d$year01 <- as.numeric(d$year == 1)
d$year00 <- as.numeric(d$year == 0)

d <- d[d$ls75 + d$ls100 == 1, ]

table(d$school_id, d$year02)
table(d$school_id, d$year01)
table(d$school_id, d$year00)
d <- d[!(d$school_id %in% c(13, 24, 29)), ]
table(d$school_id, d$zakaibag)

f0 <- lm(zakaibag ~ I(treated * year01) + ls100 + year01 + year02 + as.factor(school_id), data = d)
summary(f0)
summary(f0, cluster = "school_id")
sqrt(vcovHC(f0)[2, 2])
BMlmSE(f0, as.factor(d$school_id))$se

tid <- unique(d$school_id[d$treated == 1])
cid <- unique(d$school_id[d$treated == 0])
id <- c(tid, cid)
theta <- rep(NA, length(unique(d$school_id)))

q1 <- length(tid)
q0 <- length(cid)
q <- q1 + q0

CRStest <- function(stats, alpha = .05, random = FALSE) {
	q <- length(stats)
	G <- t(unname(expand.grid(rep(list(c(1, -1)), q))))
	ts <- apply(G * stats, 2, function(x) mean(x)/sd(x))
	M <- length(ts)
	k <- ceiling(M * (1 - alpha))
	t.orig <- ts[1]
	ts <- sort(ts)
	as.numeric(t.orig > ts[k])
}



set.seed(48103)
n.matches <- 1000
matches <- replicate(n.matches, c(sample(tid, q1), sample(cid, q1)))

theta.crs <- rep(NA, q1)
res.crs <- rep(NA, n.matches)
res.crs2 <- rep(NA, n.matches)
for(l in 1:n.matches) {
	for(k in 1:q1) {
		ids <- matches[c(k, q1+k), l]
		theta.crs[k] <- coef(lm(zakaibag ~ I(treated * year01) + ls100 + year01 + year02 + as.factor(school_id), data = d[d$school_id %in% ids, ]))[2]
	}
	res.crs[l] <- CRStest(theta.crs)
	res.crs2[l] <- CRStest(theta.crs, alpha = .01)
}

# CRS at 5%
sum(res.crs)
# CRS at 1%
sum(res.crs2)


for (k in 1:q1) {
	theta[k] <- coef(lm(zakaibag ~ year01 + ls100 + year02, data = d[d$school_id == id[k], ]))[2]
}
for (k in (q1 + 1):(length(tid) + length(cid))) {
	theta[k] <- coef(lm(zakaibag ~ year01 + ls100 + year02, data = d[d$school_id == id[k], ]))[2]
}

tstat <- mean(theta[1:q1]) - mean(theta[(q1 + 1):q])
tstat

MakeRandIndex <- function(q1, q0, n.greps = 9999) {
	q <- q1 + q0
	index <- t(cbind(1:q, replicate(n.greps, sample(1:q, q))))
	index
}

OrdTest <- function(stats, index, q1, q0) {
	q <- q1 + q0
	M <- nrow(index)
	r.stats <- matrix(stats[index[1:M, ]], M, q)
	perm.stats <- rowMeans(r.stats[, 1:q1]) - rowMeans(r.stats[, (q1 + 1):q])
	perm.stats <- sort(perm.stats)
	perm.stats
}

M <- 10^5
idx <- MakeRandIndex(q1, q0, M)

perm.stats <- OrdTest(theta, idx, q1, q0)

# adjustment is negligible at q1 = 15 and q0 = 17
x99 <- perm.stats[M * 0.99]
x95 <- perm.stats[M * 0.95]
x90 <- perm.stats[M * 0.9]
y0 <- -200

pdf("rperm_al9.pdf", height = 3, width = 7)
par(mar = c(2, 2, 1, 2))
hist(perm.stats, breaks = 100, col = "grey", border = "white", main = "", xlab = "T(gX)")
segments(x90, y0, x90, 2300, lwd = 1.5, lty = "dotted")
segments(x95, y0, x95, 2020, lwd = 1.5, lty = "dotted")
segments(x99, y0, x99, 1720, lwd = 1.5, lty = "dotted")
rect(.115, 1290, .14, 1620, col = "white", border = "white")
segments(tstat, y0, tstat, 1320, lwd = 1.5)
#rect(.08, 2350, .2, 2600, col = "white", border = "white")
text(x = perm.stats[M * 0.9], y = 2390, "90%")
text(x = perm.stats[M * 0.95], y = 2120, "95%")
text(x = perm.stats[M * 0.99], y = 1840, "99%")
text(x = tstat, y = 1460, expression(paste(italic("T"), "(", widehat(theta)[n], ")")), cex = 1)
dev.off()